discard

set more off 
use  "$healthsave\basic healthvars_w7.dta", clear 


 /** indicators describing how an obs has been cleaned **/  
 
 	lab define howcl  -1 "n/a" 											///
					   1 "ff error - late dispute"  					///
					   2 "ff error - no symptoms" 						///
					  -2 "ff error - no symptoms BUT date of diagnosis reported" ///
					   3 "ff error - symptoms/medication"				///		
					   4 "ff error - symptoms(later)"	  				
	
	tokenize LungDisease Asthma Arthritis Osteoporosis Cancer Parkinsons ///
			 PsychiatricProblems Alzheimers Dementia BloodDisorder 		 ///
			 Hypertension Angina HeartAttack HeartFailure HeartMurmur	 ///
			 HeartRhythm Diabetes Stroke Cholesterol	
	  
	foreach x in lu as ar os ca pd ps ad de bl bp an mi hf hm hr di st ch { 
		forv i = 0/7 {
			gen howcl`x'_w`i' = . 
			lab var howcl`x'_w`i' "How prevalence of `1' has been cleaned" 
			lab val howcl`x'_w`i' howcl 
		}
	mac shift 
	}
		
		
 /********************************/
 /**    dealing with 		    **/
 /**     feed forward errors    **/
 /********************************/  
 	
	/* 
	
	- previously this referred to people not being fed forward after they 
	  reported a condition. These people will already have been fed forward 
	  by the previous do-file. 
	  
	- A more problematic feed forward error is where a condition is fed 
	  forward despite NO PREVIOUS REPORT of the condition, and this is 
	  CONFIRMED. This is potentially troubling as we may see an artificial 
	  'first diagnosis'. 
	  
	- There appears to have been a screw-up with the feed-forwards for angina 
	  and heart attacks in waves 4/5/6. 
	  
	- In other conditions/waves there are at most 2/3 cases of this. 
	
	*/ 

	
** not using responses to 'do you still have it?' 	
	
	tokenize LungDisease Asthma Arthritis Osteoporosis Cancer Parkinsons ///
			 PsychiatricProblems Alzheimers Dementia BloodDisorder 		 ///
			 Hypertension Angina HeartAttack HeartFailure HeartMurmur	 ///
			 HeartRhythm Diabetes Stroke Cholesterol	
	 
		 
	foreach x in lu as ar os ca pd ps ad de bl bp an mi hf hm hr di st ch { 
		
		qui: 	gen ffproblem`x' = . 
		qui: 	lab var ffproblem`x' "Feed-forward error for `1'" 
		
		forv i = 2/7 {
			local Pi = `i'-1 
			
		* a problem for how many? 
		qui:  count if nrep`x'_w`Pi' == 0 & ff`x'_w`i' == 1 
		
		* Those subject to the problem are annoyingly 
		* not routed into the question asking WHEN they were diagnosed. 		 
		 if `r(N)' >0 { 
			di in red "`r(N)' instances of ff error for `1' in wave `i'" 
			assert  (yrd`x'_w`i' <=0 | yrd`x'_w`i' ==. ) ///
				  & (aged`x'_w`i' <= 0  | aged`x'_w`i' ==. ) ///
					if nrep`x'_w`Pi' == 0 & ff`x'_w`i' == 1 								
		
		qui: replace ffproblem`x' = 1 if nrep`x'_w`Pi' == 0 & ff`x'_w`i' == 1 	
				
		}
		}
		mac shift 
	}
	
	
	
	** there are twelve cases of ff problems 
	** EXCLUDING angina and heart attacks. 
	** deal with them manually... 
				
	foreach x in lu as ar os ca pd ps ad de bl bp hf hm hr di st ch { 
		count if ffproblem`x' == 1 
		
		if `r(N)' > 0 { 		
			br idauniq *`x'* *ask* if ffproblem`x' == 1 
			 
		}
	}
	
/* 	lab define howcl  -1 "n/a" 											///
					   1 "ff error - late dispute"  					///
					   2 "ff error - no symptoms" 						///
					   3 "ff error - symptoms/medication"				///		
					   4 "ff error - symptoms(later)"	  	*/			
		
	** lung disease 
		
		/* idauniq == 104314 
				no diagnosis reported in w1, missing in waves 2 and 3, 
				ff to w4, confirms, disputes (misdiagnosis) in w5, 
				but then in w6 reports a diagnosis + medication. 
				the issue appears to be one of timing. 
				accept dispute, assume onset in w6 */ 
			
		local idauniq 104314 	
			replace everlu_w4  = 0 if idauniq == `idauniq' 
			replace howcllu_w4 = 1 if idauniq == `idauniq' 
			replace howcllu_w5 = 1 if idauniq == `idauniq' 
			
			
	** asthma 
			
		/** idauniq == 117899 
				proxy intvws in w1 and w3 (missing in w2) 
				ff error in w3, confirmed and reports medication. 
				when respondent returns in w4, w5 w6 and w7, ff and 
				confirmed each time. 
				Again, a timing issue but unclear when first diagnosis 
				was. Set lifetime prevalence in waves 1 and 2 equal to 
				misssing  */
			
		local idauniq 117899 
			forv i = 0/7 { 	
				replace howclas_w`i' = 3 if idauniq==`idauniq'  & alive_w`i'==1 
			} 
			
			replace everas_w1 = -1 if idauniq == `idauniq' 
			replace everas_w0 = -1 if idauniq == `idauniq' 
			
		/** idauniq == 118935 
				no reports in w0 or w1 (no proxy intvws) 
				ff error in w2, confirmation but never ff later 
				so never confirms again, and never reports asthma 
				medication. set lifetime prevalence equal to zero. **/
			
		local idauniq 118935 
			forv i = 2/5 { 
				replace everas_w`i' = 0 if idauniq == `idauniq' 
				replace howclas_w`i' = 2 if idauniq == `idauniq' 
			}
		
			

	** arthritis 
		
		/** idauniq == 117899 
			[as above] 
				proxy intvws in w1 and w3 (missing in w2) 
				ff error in w3, confirmed but never reports medication. 
				disputes in w5 "never had it". Accept dispute. **/
			
		local idauniq 117899 
			
			forv i = 2/7 { 
				replace everar_w`i' = 0 if idauniq == `idauniq'  
				replace howclar_w`i' = 1 if idauniq == `idauniq' 
			}
			
		/** idauniq == 108057
				No diagnosis reported in waves 1 to 6. 
				FF error to w7, confirmed.
				No report of medication in any wave or date of diagnosis.
				Reject report (?)**/
		
		local idauniq 108057
			replace everar_w7 = 0 if idauniq == `idauniq'
			replace howclar_w7 = 2 if idauniq == `idauniq'
		
			
		
	** osteoporosis 
		
		/** idauniq == 104982 
			No diagnosis reported in w0 or w1, 
			missing in w2 and w3, ff error to w4, confirmed,
			disputed in w5 "never had it". Not fed forward again
			but diagnosis reported in w7. 
			Accept dispute. Set onset to w7 **/
		
		local idauniq 104982 
			
			forv i = 2/6 {  
				replace everos_w`i' = 0 if idauniq == `idauniq' 
				replace howclos_w`i' = 1 if idauniq == `idauniq' 
			}
		
				
	**  psychiactric problems
		
		/** idauniq == 104770 
			in all waves, no proxies. ff error into w4 confirmed.
			Not subsequently ff, so no new reports or ff or confirmations 
			in subsequent waves. no treatment recorded. Missing w7.
			reject. **/
			
		local idauniq 104770
			
			forv i = 4/6 { 
				replace everps_w`i' = 0 if idauniq == `idauniq' 
				replace howclps_w`i' = 2 if idauniq == `idauniq' 
			}
		
	
	** hypertension 
		
		/** idauniq == 104982 
			no report in w1, absent in w2 and w3, ff into w4, 
			confirmation but not recorded as hypertensive, 
			reports no medication, never subsequently ff and 
			so no more confirmations. reject. 	**/
				
		local idauniq 104982		
			forv i = 4/6 { 
				replace everbp_w`i' = 0 if idauniq == `idauniq' 
				replace howclbp_w`i' = 2 if idauniq == `idauniq' 
			}
		
		/** idauniq == 117899 
				[see above as well] recorded as hypertensive in w0 
				but no diagnosis reported in w1 (proxy) 
				ff error to w3, confirmation + reports high bp medication 
				when respondent answers (no proxy) in w4, further confirmation 
				and medication reported again. 
				in w5 condition is ff and confirmed, but no medication reported
				in w6 the condition is not ff. 
				accept positive prevalence, but I don't think we can know about 
				waves 1 and 2. code prevalence to missing. 
		
		**/
		
		local idauniq 117899 
			
			forv i = 0/2 { 
				replace everbp_w`i' = -1 if idauniq == `idauniq' 
			} 
			
			forv i = 0/6 { 
				replace howclbp_w`i' = 3 if idauniq == `idauniq' 
			} 
		

	** stroke 
		
		/** idauniq == 111214 
				only present in w1, w6 and w7
				ff error to w6. 
				confirmed in w6 and w7.
				reports having had a stroke in the last two years. 
				accept **/ 
				
			local idauniq 111214
			replace howclst_w6 = 3 if idauniq == `idauniq' 	
				
	** cholesterol  		
		
		/** idauniq == 104770
				ff error into w4, reports no medication, 
				no subsequent ff or confirmations. 
				reject. **/
		
			local idauniq 104770
			
			forv i = 4/6 { 
				replace everch_w`i' = 0 if idauniq == `idauniq' 
				replace howclch_w`i' = 2 if idauniq == `idauniq' 
			} 
			
		/** idauniq == 	118270 
				ff error into w5, reports taking medication to 
				keep cholesterol low, proxy in w6 so doesn't reconfirm. 
				accept onset 	**/
			
			local idauniq 118270		
			forv i = 5/6 { 
				replace howclch_w`i' = 3 if idauniq == `idauniq' 
			} 	
			

	
	
	
	
	
	
	
	
	

	
	** 	NOW we deal with heart attacks and angina. 
	** 	the good news is that those who were ff were asked if they had had 
	** 	angina-type pain over the last two years, or if they had had a heart 
	** 	attack over the past two years. 
	
	**  some people dispute, others report symptoms, 
	**  others do BOTH. 
	
	** all complicated by the fact that those subject to the routing 
	** error seem to have unusually high rates of the other condition 
	** i.e. those misrouted in to the heart attack confirmation are 
	** disproportionately likely to have angina and vice versa. 
	
	ta ffproblemmi ffprobleman 
	
	lab define problemtype  1 "Only dispute" ///
							2 "No dispute and no symptoms" ///
							3 "Only sypmtoms" ///
							4 "Symptoms AND dispute" 
	
	
			gen sympan_w1 = . 
			gen sympmi_w1 = . 
	
		forv i = 2/7 {	
		local Pi = `i' - 1 

		** symptoms of angina 
			gen sympan_w`i' = heyraw`i' == 1  | sympan_w`Pi' == 1 
		
		** symptoms of heart attack 
			gen sympmi_w`i' = henmmiw`i' >=1 & henmmiw`i'<=3 | sympmi_w`Pi' == 1 	
		} 
			
		egen eversympan = anymatch(sympan_w?) , v(1) 
		egen eversympmi = anymatch(sympmi_w?) , v(1) 
	
						
	foreach x in mi an { 
	
	** indicator for whether a wave is disputed (combined with ever`x'_w?) 
		
		egen disputed`x'_w1 = anymatch(disp`x'_w2 disp`x'_w3 disp`x'_w4  disp`x'_w5 disp`x'_w6 disp`x'_w7) , v(3 4 5) 		
		egen disputed`x'_w2 = anymatch(			  disp`x'_w3 disp`x'_w4  disp`x'_w5 disp`x'_w6 disp`x'_w7) , v(3 4 5) 
		egen disputed`x'_w3 = anymatch(           			 disp`x'_w4  disp`x'_w5 disp`x'_w6 disp`x'_w7) , v(3 4 5) 
		egen disputed`x'_w4 = anymatch(			  				         disp`x'_w5 disp`x'_w6 disp`x'_w7) , v(3 4 5) 
		egen disputed`x'_w5 = anymatch(								    			disp`x'_w6 disp`x'_w7) , v(3 4 5) 
		egen disputed`x'_w6 = anymatch(														   disp`x'_w7) , v(3 4 5)
		egen disputed`x'_w7 = anymatch(disp`x'_w7)	 													   , v(3   5)
		
		egen everdispute`x' = anymatch(disp`x'_w?) , v(3 4 5) 
	
	** categorise respondents into different types of problem 
		
		gen problemtype`x'     = 1 if ffproblem`x' == 1 & everdispute`x'==1
		replace problemtype`x' = 2 if ffproblem`x' == 1 & everdispute`x'==0 & eversymp`x' == 0 
		replace problemtype`x' = 3 if ffproblem`x' == 1 & everdispute`x'==0 & eversymp`x' == 1 		
		replace problemtype`x' = 4 if ffproblem`x' == 1 & everdispute`x'==1 & eversymp`x' == 1 
		lab val problemtype`x' problemtype
	}
		
	
	/** for documentation **/	
			
		cd "P:\ELSA\Healthdynamics\documentation\tables\" 
		
		tabout problemtypean using ffproblemtype_angina.txt , replace 
		tabout problemtypemi using ffproblemtype_heartattack.txt , replace 

		
	
 /*	lab define howcl  -1 "n/a" 											///
					   1 "ff error - late dispute"  					///
					   2 "ff error - no symptoms" 						///
					   3 "ff error - symptoms/medication"				///		
					   4 "ff error - symptoms(later)"	  			*/	
			
	
	
		** GROUP 1: LATE DISPUTES with no reports of symptoms 
		** 		accept these disputes 
		
		foreach x in mi an { 
			forv i = 3/7 { 
				replace howcl`x'_w`i' = 1 if (ever`x'_w`i' == 1 | ever`x'_w`i' == -1) /* has reported condition */ ///
											 & problemtype`x' == 1 ///
											 & disputed`x'_w`i' == 1 
											 
				replace howcl`x'_w`i' = 1 if ever`x'_w`i' == 0   ///
											 & problemtype`x' == 1  ///
											 & (disp`x'_w`i' == 3 | disp`x'_w`i' == 5)
				
				replace ever`x'_w`i' = 0 if ever`x'_w`i'       == 1 ///
											& problemtype`x'   == 1 ///
											& disputed`x'_w`i' == 1 
				
			count if howcl`x'_w`i' == 1
			di in red "`x' wave `i' cleaned `r(N)'"
		}
		}
			
			
		** GROUP 2: NO DISPUTES, ONLY SYMPTOMS (OR NO SYMPTOMS) 
			
			foreach x in mi an { 
				forv i = 3/7 { 
				
				
		 /** doesn't report symptoms ever **/  
		
					replace howcl`x'_w`i' = 2 ///
								if eversymp`x' == 0 ///
									 & problemtype`x' == 2  
									 
					replace ever`x'_w`i' = 0 		///
								if ever`x'_w`i' == 1 ///
								 & eversymp`x' == 0 ///
								 & problemtype`x' == 2 
								 
										
		/** reports symptoms **/ 	
					
					replace howcl`x'_w`i' = 3 /// 
								if symp`x'_w`i' == 1 ///
								& problemtype`x' == 3 
					
					replace ever`x'_w`i' = 1 ///
								if ever`x'_w`i' == 0 ///
								&  symp`x'_w`i' == 1 ///
								& problemtype`x' == 3 
					
					
		/** reports symptoms (later) **/	
		
					replace howcl`x'_w`i' = 4 /// 
								if symp`x'_w`i' == 0 ///
								& eversymp`x' == 1  ///
								& problemtype`x' == 3 
					
					replace ever`x'_w`i' = 0 ///
								if ever`x'_w`i' == 1 	///
								&  symp`x'_w`i' == 0 	///
								& eversymp`x' == 1  	///								
								& problemtype`x' == 3
					
										
					
		}
		}
		
		
		/** two individuals fall into the category of not 
			reporting any symptoms, but somehow are routed 
			in to the question of WHEN they were diagnosed. 
			make their lifetime prevalence consistent with 
			these reported dates **/
			
			/** reports year of diagnosis as 2000 in w6 **/
			forv i = 1/7 { 
				replace everan_w`i' = 1 	if idauniq == 103769 
				replace howclan_w`i' = -2  if idauniq == 103769 
			}
			
			/** reports year of diagnosis as 2010 in w5 **/
			
			replace everan_w4 = 0 if idauniq == 107852 
			replace everan_w5 = 1 if idauniq == 107852 
			replace everan_w6 = 1 if idauniq == 107852 
			
			forv i = 4/7 { 
				replace howclan_w`i' = -2 if idauniq == 107852
			}
				
		
		** GROUP 3: DISPUTES AND SYMPTOMS 
		**  	code manually. 
		** ultimately we accept their disputes for angina. 
		
		
		foreach x in an mi { 
		br idauniq ever`x'* disp`x'* *symp`x'* if problemtype`x' == 4
		}

					
		local idauniq 	= 100039 
			
			/* ff error to w5, confirms, says 'still have it', 
				and reports angina type pain in last two years. 
				Then dispute in w6 (not prev but now). 
				accept dispute and code onset as in w6. */ 
		
				replace everan_w5 = 0 if idauniq == `idauniq' 
				replace howclan_w5 = 1 if idauniq == `idauniq' 
				
		local idauniq = 118284 
			
			/** ff error to w4, confirmation and reports angina-type 
				pain in past two years. 
				Dispute in w5 ("never had it") 
				accept dispute. **/ 
				
				replace everan_w4 = 0 if idauniq == `idauniq' 
				
				forv i = 4/6 { 
					replace howclan_w`i' = 1 if idauniq == `idauniq' 
				}
				
		local idauniq = 120725 
			
			/** ff error to w5, confirmation and reports angina type pain 
				in w5. 
				In w6 disputes and says never had it. 
				Accept dispute. **/ 
				
				replace everan_w5 = 0 if idauniq == `idauniq' 
				replace howclan_w5 = 1 if idauniq == `idauniq' 
				replace howclan_w6 = 1 if idauniq == `idauniq' 
				
		
		local idauniq = 160050 
				
			/** ff error to w5, reports angina type pain in last two years 
				but 'doesn't know' if still has it. Disputes in w6 saying 
				never had it. accept dispute **/
				
				replace everan_w5 = 0 if idauniq == `idauniq' 
				replace howclan_w5 = 1 if idauniq == `idauniq' 
				replace howclan_w6 = 1 if idauniq == `idauniq' 
				
						
		
		
		
		** HEART ATTACKS ** 
		
		local idauniq = 110959 
			
			/** ff error to w4 , dispute in w5 ("never had it"), 
				but reports heart attack in w6. 
				Accept dispute and leave in onset in w6 **/
			
			replace evermi_w4 = 0 if idauniq == `idauniq' 
			replace evermi_w5 = 0 if idauniq == `idauniq' 
			
			replace howclmi_w4 = 1 if idauniq == `idauniq' 
			replace howclmi_w5 = 1 if idauniq == `idauniq' 
			
			
		local idauniq = 111842

			/** ff error to w4 , confirms in w4 AND w5, 
				says had a heart attack in w4, 
				 dispute in w6 ("misdiagnosed"). 
				 Accept dispute. **/  
				 
			replace evermi_w4 = 0 if idauniq == `idauniq' 
			replace evermi_w5 = 0 if idauniq == `idauniq' 
			
			replace howclmi_w4 = 1 if idauniq == `idauniq' 
			replace howclmi_w5 = 1 if idauniq == `idauniq' 
			

		local idauniq = 111864 
		
			/** ff error to w4 , reports heart attack in past two 
				years, then confirms again in w5, 
				dispute in w6 ("never had it") 
				accept dispute (assume confusion with angina?) **/ 
			
			forv i = 4/6 { 
				replace evermi_w`i' = 0 if idauniq == `idauniq' 
				replace howclmi_w`i' = 1 if idauniq == `idauniq' 
			}
			
		
		local idauniq = 117993

			/** ff error to w4, confirmation and 
				reports heart attack in past two years. 
				then dispute in w5 ("misdiagnosed") 
				accept dispute. **/ 
				
			forv i = 4/5 { 
				replace evermi_w`i' = 0 if idauniq == `idauniq' 
				replace howclmi_w`i' = 1 if idauniq == `idauniq' 
			} 
				

		local idauniq = 119447 
	
			/** ff error to w4 , dispute in w5 ("never had it") 
				then heart attack reported in w6. 
				Accept dispute, allow subsequent onset. ***/ 
					
			forv i = 4/5 { 
				replace evermi_w`i' = 0 if idauniq == `idauniq' 
				replace howclmi_w`i' = 1 if idauniq == `idauniq' 
			} 
						

	
	
	/** everyone with a feed forward error 
		should now have entirely consistent responses */ 
		
		foreach x in an mi { 
		forv i = 1/6 { 
		local Ni = `i' + 1 
			ta ever`x'_w`i' ever`x'_w`Ni'  if ffproblem`x'== 1
			
		}
		}
	
		preserve 
		forv i = 1/7 { 
			replace everan_w`i' = . if everan_w`i' == -1 
			replace evermi_w`i' = . if evermi_w`i' == -1 
		} 
		
				
		restore 
									
		preserve 
		keep idauniq ever??_w? 
		save "$healthsave\basic healthvars with ff errors solved_w7.dta", replace 
		restore 
		
		 save "$healthsave\with ff errors solved_w7.dta", replace 

		preserve 
		
		cd "P:\ELSA\Healthdynamics\documentation\tables\" 

		local foreshape 
		local fortabout
		local fortabout2 
		
		foreach x in an mi { 
			forv i = 1/7 { 
				replace ever`x'_w`i' = . if inwave`i'==0 
				gen missing`x'_w`i' = ever`x'_w`i'<0 
				replace ever`x'_w`i' = . if ever`x'_w`i'<0 
				local forkeep `forkeep' ever`x'_w`i' missing`x'_w`i' 
			} 
			
		local foreshape `foreshape'   ever`x' missing`x'
		local fortabout `fortabout'   mean ever`x' 
		local fortabout2 `fortabout2' sum missing`x' 	
			
		}
		
		keep idauniq inwave* `forkeep'
		ren *_w? *?
		ren inwave inwaveall 
		reshape long `foreshape' inwave , i(idauniq) j(wave) 
		
		tabout wave using fferrorscorrected_prevalence_rates_by_wave_w7.txt , ///
			c(`fortabout') sum replace f(4) 
			
		tabout wave if inwaveall==16 ///
			using fferrorscorrected_prevalence_rates_by_wave_balanced_panel_w7.txt , ///
			c(`fortabout') sum replace f(4) 
		
		tabout wave if inwaveall==16 /// 
			using number_missings_fferrorscorrected_w7.txt , /// 
			c(`fortabout2') sum replace f(0) 
		
		tsset idauniq wave 
		gen newan = everan - L.everan 
		replace newan = 0 if newan <0 
		gen tmp = inwave==1 & L.inwave==1 
		gen newmi = evermi - L.evermi 
		replace newmi = 0 if newmi<0 
		tabout wave if tmp == 1 using fferrorscorrected_onset_rates_by_wave_w7.txt , /// 
			c(mean newan mean newmi count newmi) f(4) sum replace 
		tabout wave if tmp == 1 & inwaveall==16 using fferrorscorrected_onset_rates_by_wave_balanced_panel.txt , /// 
			c(mean newan mean newmi count newmi) f(4) sum replace 			
		
		restore 
		
